********************************************************************************
** Summary Statistics, Mortgagor
********************************************************************************
clear
clear matrix
set more off
set scheme s1color
estimates clear
graph drop _all
set matsize 2500
log close _all

*Set Directory
cd "../Do"

********************************************************************************
** LOAD AND CLEAN DATA: DURABLES
********************************************************************************

use ../Data/matched_data_durables_jun2018_baseline.dta, clear

*labelling the 3 different durables spending variables; all are composites; one is nominal; another is "real" deflated using a single (appliances) cpi; another is "real" deflated on a good-by-good basis (in previous merge file)
la var durables "nominal durable goods spending"
la var durables_real1 "real durable goods spending, single deflator"
la var durables_real2 "real durable goods spending, separate deflators"
drop ethnicity
recode mort (5=0)
recode stocks (5=0)
recode retacct (5=0)
recode howner (5=0)

* Recode some expectations variables from the Inflation surveys. 

* unemployment dummies
recode q6 (1=1) (2 3 = 0), gen(unemp_increase)
recode q6 (3=1) (1 2 = 0), gen(unemp_decrease)

gen conditions_12m = q2a
* note everybody who says "other" is in separate category
recode q2a (1=1) (2 3 = 0), gen(conditions_12m_better)
recode q2a (2=1) (1 3 = 0), gen(conditions_12m_worse)

gen interestrate_12m = q7
recode interestrate_12m (1=1) (2 3 = 0), gen(intrate_12m_up)
recode interestrate_12m (3=1) (1 2 = 0), gen(intrate_12m_down)

gen bconditions_12m = q4
* note everybody who says "other" is in separate category
recode q4 (1=1) (2 3 = 0), gen(bconditions_12m_better)
recode q4 (2=1) (1 3 = 0), gen(bconditions_12m_worse)

* house price forecasts

gen hppoint = .
replace hppoint = 0 if q41==3
replace hppoint = q42 if q41==1
replace hppoint = -q42 if q41==2
* there are some extreme outliers (in the tens of thousands)
replace hppoint = . if abs(hppoint)>=200

la var hppoint "House price expectation"


*** Prepare some additional descriptives
* Race isn't reported in all periods. 
preserve
collapse (mean) race , by(prim_key)
sort prim_key
tempfile race
save `race'
restore

drop race
sort prim_key
merge m:1 prim_key using `race'
tab _merge
drop _merge
drop if prim_key==""

recode race (1=1) (nonmissing = 0), gen(white)
gen nonwhite = 1-white
recode gender (2=1) (1=0), gen(female)
recode highesteducation (4 9 = 0) (10/16 = 1), gen(coll)

* q31s* gives codes. Kind of odd that they've created separate variables. 
*MB: odd to generate employed variable, because everyone in sample is employed--however some also say they are retired (?) 
gen employed = . 
replace employed=1 if q31s1==1
replace employed = 0 if (q31s2==2 | q31s3==3 | q31s4==4 | q31s5==5 | q31s6==6 | q31s7==7) & q31s1!=1


* generate currently retired variable.
*odd that some are retired even though all are employed; how do retired people have wage growth expectations? they may be working a small job anyway
drop retired 
gen retired = . 
replace retired = 1 if q31s5==5
replace retired = 0 if (q31s1==1 | q31s2==2 | q31s3==3 | q31s4==4 | q31s6==6 | q31s7==7) & q31s5!=5
tab retired


*below also added December 2019 from Arman's old code
gen gas_expect = . 
replace gas_expect = 0 if q47a==3
replace gas_expect = q47a_higher if q47a==1
replace gas_expect = -q47a_lower if q47a==2

*more preparation for regressions
local expectations "intrate_12m_up intrate_12m_down unemp_increase unemp_decrease rw_expect d_wageiqr hppoint"
local infl1 "d_inflmedian d_infliqr"
local infl2 "d_longinflmedian d_longinfliqr"

*MBwhen using lagss--may need to rename lagged vars or these locals to match
*local lags1 "lagE_d_inflmedian lagE_d_infliqr" ;
*local lags2 "lagE_d_longinflmedian lagE_d_longinfliqr" ;

local spec1 "Short-Run Infl. Exp."
local spec2 "Medium-Run Infl. Exp."
la var d_inflmedian "Inflation Expectations"
la var d_infliqr "Inflation Uncertainty"

*MB resume: add "howner_fix" code ?

*replace howner=0 if howner==.
*adds ten people to homeowner status; there are still some with howner_fix==0 who have positive mortgage payment, but missing data for has mortgage and/or mortgage amount
*two observations have howner==0 and mort==1 (say they're not a homeowner but they say they have a mortgage); not important so not recoding them for now

drop if mort==0 & amtmort!=. & amtmort>100
*fixing the mort indicator to impute plausible values for people based on other information
replace mort = 0 if (howner!=1 | mortgage==0) & mort==.
replace mort = 1 if (howner==1 | (mortgage>0 & mortgage!=.)) & mort==.

*notice that amtmort may contradict mort dummy: one case such that mort=0 and amtmort if positive; one has mort==0 and amtmort==0; these are consistent but you might expect amtmort to be missing
*I would drop the first person alternatively with including them and using their stated mortgage amount; including for now
*there are 44 cases of those with mortgages for whom mortgage balance is missing--so I will just drop these observations from the sample

*the mortgage dummy is non-missing for over 2000 observations; however the mortgage balance (amtmort) is observed only for 990 people
*tab mort
*JIMIN: we could check results for everyone with non-missing mortgage dummy regardless of mortgage balance being observed; would have to drop mortgage balance from the regression

la var mort "Has Mortgage"

*MB: generating quasi-continuous income variable based on midpoint of ranges of annual household income variables (familyincome and familyincome_part2)
rename familyincome inc2
rename familyincome_part2 inc2_2
drop if inc2==.
*here is the income variable: "new_faminc"
gen new_faminc=.
replace new_faminc=2500 if inc2==1
replace new_faminc=6250 if inc2==2
replace new_faminc=8750 if inc2==3
replace new_faminc=11250 if inc2==4
replace new_faminc=13750 if inc2==5
replace new_faminc=17500 if inc2==6
replace new_faminc=22500 if inc2==7
replace new_faminc=27500 if inc2==8
replace new_faminc=32500 if inc2==9
replace new_faminc=37500 if inc2==10
replace new_faminc=45000 if inc2==11
replace new_faminc=55000 if inc2==12
replace new_faminc=67500 if inc2==13
replace new_faminc=87500 if inc2==14 & (inc2_2==1 | inc2_2==.)
*above accounts for one person with inc2==14 and inc2_2 missing; not sure why that's the case but I asigned them the lowest category of income over $75000
replace new_faminc=112500 if inc2==14 & inc2_2==2
replace new_faminc=162500 if inc2==14 & inc2_2==3
replace new_faminc=237500 if inc2==14 & inc2_2==4

gen log_new_faminc=log(new_faminc)


sort prim_key quarter
*bringing in SAMPLE WEIGHTS, to quarterly data: warning, we might lose observations if the set requiring weights has changed ; this will affect our ability to run regs using non-employed types (can only do unweighted) 
merge m:1 prim_key using ../Data/qweights_pooled
*these are the reg weights, the full sample weights will still be called weight_full
*weight variable is called "weight_samp" to indicate the weights were designed for the regression sample
*134 observations dropped that didn't merge with a weight_samp
drop if _merge!=3
drop _merge

*MB December 2019: key juncture: impose different sample restrictions and use different real/nominal durables
*********************
*define regression sample before recentering any variables: dropping extreme values
*below should drop top 2 highest values of durables spending; I doubt it makes any difference 
*JIMIN: try turning on and off the following restrictions
drop if durables>20000  | durables_real1>20000 | durables_real2>20000
drop if prim_key=="5041140:1"
drop if mortgage>200000 & mortgage!=.
drop if d_inflmedian>35
drop if d_longinflmedian>35
drop if hppoint<-50
**end of optional restrictions 


*assigning locals for weights (can turn on or off in regression)
local weights "[pweight=weight_samp]"
local weights_full "[pweight=weight_full]"
local pwfile "_pw"
*drop those with missing values for regressors
la var d_inflmedian "Inflation Expectation"
la var d_infliqr "Inflation Uncertainty"
la var d_longinflmedian "Inflation Expectation"
la var d_longinfliqr "Inflation Uncertainty"
la var lag_IE "Lagged Inflation Expectation"
la var lag_infl_iqr "Lagged Infl. Uncertainty"
la var hppoint "House price expectation"
la var gas_expect "Gas price expectation"

*variable for sum of monthly payments--interactions between this variable and IE may be included in some models
gen payments=mortgage+car if howner==1
replace payments=rent+car if howner!=1
gen log_payments=.
replace log_payments=log(payments) if payments>0
replace log_payments=0 if payments==0
*drop any observations with extreme value for payments (110,000): only if running a regression interacting with payments: actually none dropped here (unlike nondurables)
drop if payments>100000

*defining sample; removing those with missing values
drop if weight_samp==.
drop if d_inflmedian==.
drop if d_infliqr==.
drop if durables==.
*dropping lagged IE: only need if we include lag IE in regression 
drop if lag_IE==.
drop if lag_infl_iqr==.
*new income variable: new_faminc is recode of categorical variables familyincome and familyincome_part2; former variable was earnings last month and highly unreliable
drop if new_faminc==.
drop if intrate_12m_up==.
drop if intrate_12m_down==.
drop if unemp_increase==.
drop if unemp_decrease==.
drop if rw_expect==.
drop if d_wageiqr==.
drop if rage==.
drop if nonwhite==.
drop if female==.
drop if coll==.
drop if retired==.
drop if mort==.
*below results in loss of 300+ observations--results are robust not imposing this restriction and omitting hppoint from regressions
drop if hppoint==.
*below drops 111 observations--robustness applies again 
drop if howner==.


*replacing inflation expectations (and lags) with centered version: note that means are not weighted; this doesn't matter because weighted mean is within 0.04 ppts of unweighted mean IE
egen IE_sampmean=mean(d_inflmedian) 
gen d_inflmedian2= d_inflmedian
replace d_inflmedian=d_inflmedian-IE_sampmean
egen lag_IE_sampmean=mean(lag_IE)
replace lag_IE=lag_IE-lag_IE_sampmean
egen lag_unc_sampmean=mean(lag_infl_iqr)
replace lag_infl_iqr=lag_infl_iqr-lag_unc_sampmean

*replacing log monthly payments with centered version
egen log_payments_sampmean=mean(log_payments)
replace log_payments=log_payments-log_payments_sampmean
*replacing log household income with centered version
egen log_inc_sampmean=mean(log_new_faminc)
gen log_new_faminc2 = log_new_faminc
replace log_new_faminc=log_new_faminc-log_inc_sampmean

sort prim_key quarter

*need to xtset the data
destring prim_key, generate(id_new) ignore(":")
xtset id_new quarter

*generate within-person means of time-varying independent variables: note that for variables that were recentered above, these now represent the within-household average of the deviation from the sample mean for the given variable
egen IE_bar=mean(d_inflmedian), by(id_new)
egen IE_unc_bar=mean(d_infliqr), by(id_new)
egen new_faminc_bar=mean(new_faminc), by(id_new)
egen log_new_faminc_bar=mean(log_new_faminc), by(id_new)
egen intrate_up_bar=mean(intrate_12m_up), by(id_new)
egen intrate_down_bar=mean(intrate_12m_down), by(id_new)
egen unemp_up_bar=mean(unemp_increase), by(id_new)
egen unemp_down_bar=mean(unemp_decrease), by(id_new)
egen rw_bar=mean(rw_expect), by(id_new)
egen rw_unc_bar=mean(d_wageiqr), by(id_new)
*egen hp_bar=mean(hppoint), by(id_new)
*egen howner_bar=mean(howner), by(id_new)
egen mort_bar=mean(mort), by(id_new)
egen log_payments_bar=mean(log_payments), by(id_new)
egen hp_bar=mean(hppoint), by(id_new)
egen howner_bar=mean(howner), by(id_new)
** Within-person mean of lagged IE and lagged infl uncertainty are not needed--they are collinear with within-person mean of current IE and uncertainty

*generate total durables spending within household
*add to below: use durables_real1 and durables_real2 instead--number of observations of "durables" per household will be identical to that for either "durables_real1" or "durables_real2", so no need to repeat below for alternate versions
egen tot_durables=total(durables), by(id_new)
sum tot_durables, d
*below drops 142 observations associated with households who never purchased durables under period of observation (reported zero spending on durables) 
drop if tot_durables==0
*generate variable that equals 1 in all cases, to sum to determine observations per person
gen pre_obs=1
*generate sum of observations per person
egen obs2=total(pre_obs), by(id_new)
sum obs2, d
*restrict on having nonzero durables spending in at least one period (based on total durables spending within household) 
*define sample  based on sufficient observation
gen durables_sample_hp=(obs2>=3)

gen boughtdur = (durcount>0) if durcount!=. //JN: Created dummy for bought any durable.

** Mortgage Subsample
keep if mort==1
drop if mort==1 & amtmort==. 
drop if howner==0 //Doesn't make sense that some people aren't homeowners AND have a mortgage...
** Log of mortgage balance 
gen log_amtmort = log(amtmort) if amtmort>0
replace log_amtmort=0 if amtmort==0

** Replacing log_mortgage amount with centered version
egen log_amtmort_sampmean=mean(log_amtmort)
replace log_amtmort=log_amtmort-log_amtmort_sampmean
** Generate within-household average log mortgage balance
egen log_amtmort_bar=mean(log_amtmort), by(id_new)
** Generate variable that equals 1 in all cases, to sum to determine observations per person
gen pre_obs3=1
** Generate sum of observations per person
egen obs4=total(pre_obs3), by(id_new)
sum obs4, d

** Define sample  based on sufficient observation
gen mort_samp=(obs4>=3)
** Label
la var log_amtmort_bar "Log Mean Mort. Balance"
la var log_amtmort "Log Mort. Balance"

#delimit ;
xtgee durables_real2 d_inflmedian d_infliqr lag_IE lag_infl_iqr i.quarter [pweight=weight_samp] if mort_samp==1, 
family(poisson) link(log) corr(exch) vce(robust);
#delimit cr
gen sampleg = e(sample)
count if sampleg==1

gen ver = 1 if sampleg==1

tempfile quarter
save `quarter'

********************************************************************************
** LOAD AND CLEAN DATA: NONDURABLES
********************************************************************************
use ../Data/matched_data_nondurables_jun2018_baseline.dta, clear

*list of spending categories that will add up to nondurables (first version excludes only sports, which is excluded because it has durable goods in it)
#delimit ;
local spendcats "
electricity water heatingfuel phonecable housecleaningproducts housecleaningservice
gardenproducts gardenservice clothing personalcare drugs healthcareservices medsupplies entertainment
hobbies personalservices otherchildspending foodhome foodout gasoline";
#delimit cr

*based on content of "spendcats" above, nondurables defined as all included in that group except sports
egen nondurables = rowtotal(`spendcats')

*not that nondurables has one outlier value over 29000 (for monthly spending on nondurable goods/services) 
*before running some regressions, extreme values for nondurables will be omitted; a small number of zeroes will be dropped

drop ethnicity

recode mort (5=0)
recode stocks (5=0)
recode retacct (5=0)
recode howner (5=0)

* Recode some expectations variables from the Inflation surveys. 

* unemployment dummies
recode q6 (1=1) (2 3 = 0), gen(unemp_increase)
recode q6 (3=1) (1 2 = 0), gen(unemp_decrease)

gen conditions_12m = q2a
* note everybody who says "other" is in separate category
recode q2a (1=1) (2 3 = 0), gen(conditions_12m_better)
recode q2a (2=1) (1 3 = 0), gen(conditions_12m_worse)

gen interestrate_12m = q7
recode interestrate_12m (1=1) (2 3 = 0), gen(intrate_12m_up)
recode interestrate_12m (3=1) (1 2 = 0), gen(intrate_12m_down)

gen bconditions_12m = q4
* note everybody who says "other" is in separate category
recode q4 (1=1) (2 3 = 0), gen(bconditions_12m_better)
recode q4 (2=1) (1 3 = 0), gen(bconditions_12m_worse)

* house price forecasts
gen hppoint = .
replace hppoint = 0 if q41==3
replace hppoint = q42 if q41==1
replace hppoint = -q42 if q41==2
* there are some extreme outliers (in the tens of thousands)
replace hppoint = . if abs(hppoint)>=200
la var hppoint "House price expectation"

* Deflation: using 2012q1 PCUN (nondurables CPI): generate a new variable so regressions can be run on nominal and real data alternatively
* from Arman: note that PCUN was redefined in the haver do file to give 2012q1 dollars

rename spend_month date_monthly
sort date_monthly
merge m:1 date_monthly using "../Data/haver_m.dta"

tab _merge
drop if _merge==2
drop _merge

*MB: my simple deflation command: just deflating my main measure of nondurables
gen nondurables_real=nondurables/PCUN
rename date_monthly spend_month
*note real values are only a little higher on average than nominal values

*** Prepare some additional descriptives
* Race isn't reported in all periods. 
preserve
collapse (mean) race , by(prim_key)
sort prim_key
tempfile race
save `race'
restore

drop race
sort prim_key
merge m:1 prim_key using `race'
tab _merge
drop _merge
drop if prim_key==""

recode race (1=1) (nonmissing = 0), gen(white)
gen nonwhite = 1-white
recode gender (2=1) (1=0), gen(female)
recode highesteducation (4 9 = 0) (10/16 = 1), gen(coll)

* q31s* gives codes. Kind of odd that they've created separate variables. 
*MB: odd to generate employed variable, because everyone in sample is employed--however some also say they are retired (?) 
gen employed = . 
replace employed=1 if q31s1==1
replace employed = 0 if (q31s2==2 | q31s3==3 | q31s4==4 | q31s5==5 | q31s6==6 | q31s7==7) & q31s1!=1


* generate currently retired variable.
*odd that some are retired even though all are employed; how do retired people have wage growth expectations? they may be working a small job anyway
drop retired 
gen retired = . 
replace retired = 1 if q31s5==5
replace retired = 0 if (q31s1==1 | q31s2==2 | q31s3==3 | q31s4==4 | q31s6==6 | q31s7==7) & q31s5!=5
tab retired

*more preparation for regressions
local expectations "intrate_12m_up intrate_12m_down unemp_increase unemp_decrease rw_expect d_wageiqr hppoint"
local infl1 "d_inflmedian d_infliqr"
local infl2 "d_longinflmedian d_longinfliqr"

*MBwhen using lagss--may need to rename lagged vars or these locals to match
*local lags1 "lagE_d_inflmedian lagE_d_infliqr" ;
*local lags2 "lagE_d_longinflmedian lagE_d_longinfliqr" ;

local spec1 "Short-Run Infl. Exp."
local spec2 "Medium-Run Infl. Exp."
la var d_inflmedian "Inflation Expectations"
la var d_infliqr "Inflation Uncertainty"

*replace howner=0 if howner==.
*adds ten people to homeowner status; there are still some with howner_fix==0 who have positive mortgage payment, but missing data for has mortgage and/or mortgage amount
*two observations have howner==0 and mort==1 (say they're not a homeowner but they say they have a mortgage); not important so not recoding them for now

*the mortgage dummy is non-missing for over 2000 observations; however the mortgage balance (amtmort) is observed only for 990 people
*tab mort
*reasonable recode of mortgage dummy based on other information
replace mort = 0 if (howner!=1 | mortgage==0) & mort==.
replace mort = 1 if (howner==1 | (mortgage>0 & mortgage!=.)) & mort==.
la var mort "Mortgage Indicator"

*MB: generating quasi-continuous income variable based on midpoint of ranges of annual household income variables (familyincome and familyincome_part2)
rename familyincome inc2
rename familyincome_part2 inc2_2
drop if inc2==.
*here is the income variable: "new_faminc"
gen new_faminc=.
replace new_faminc=2500 if inc2==1
replace new_faminc=6250 if inc2==2
replace new_faminc=8750 if inc2==3
replace new_faminc=11250 if inc2==4
replace new_faminc=13750 if inc2==5
replace new_faminc=17500 if inc2==6
replace new_faminc=22500 if inc2==7
replace new_faminc=27500 if inc2==8
replace new_faminc=32500 if inc2==9
replace new_faminc=37500 if inc2==10
replace new_faminc=45000 if inc2==11
replace new_faminc=55000 if inc2==12
replace new_faminc=67500 if inc2==13
replace new_faminc=87500 if inc2==14 & (inc2_2==1 | inc2_2==.)
*above accounts for one person with inc2==14 and inc2_2 missing; not sure why that's the case but I asigned them the lowest category of income over $75000
replace new_faminc=112500 if inc2==14 & inc2_2==2
replace new_faminc=162500 if inc2==14 & inc2_2==3
replace new_faminc=237500 if inc2==14 & inc2_2==4

gen log_new_faminc=log(new_faminc)


sort prim_key spend_month
*bringing in SAMPLE WEIGHTS, to monthly data: warning, we might lose observations if the set requiring weights has changed ; this will affect our ability to run regs using non-employed types (can only do unweighted) 
merge m:1 prim_key using ../Data/mweights_pooled
*these are the reg weights, the full sample weights will still be called weight_full
*weight variable is called "weight_samp" to indicate the weights were designed for the regression sample
*195 observations dropped that didn't merge with a weight_samp
drop if _merge!=3
drop _merge

*define regression sample before recentering any variables
*drop people with extreme values for spending, inflation expectations, mortgage payment that looks like a mortgage balance
*outliers identified by Ali: turn on as alternative MB: the below prim_key doesn't look suspicious to me---this drop may have been related to the previous (bad) income variable
*drop if prim_key=="5041140:1"
*drops 
drop if mortgage>200000 & mortgage!=.
*drops 17 observations
drop if d_inflmedian>35
*drops 13 observations
drop if nondurables==0
*drops 1 observation
drop if nondurables>28000

*assigning locals for weights (can turn on or off in regression)
local weights "[pweight=weight_samp]"
local weights_full "[pweight=weight_full]"
local pwfile "_pw"
*drop those with missing values for regressors
la var d_inflmedian "Inflation Expectation"
la var d_infliqr "Inflation Uncertainty"
la var d_longinflmedian "Inflation Expectation"
la var d_longinfliqr "Inflation Uncertainty"
la var lag_IE "Lagged Inflation Expectation"
la var lag_infl_iqr "Lagged Infl. Uncertainty"

*variable for sum of monthly payments--interactions between this variable and IE may be included in some models
gen payments=mortgage+car if howner==1
replace payments=rent+car if howner!=1
gen log_payments=.
replace log_payments=log(payments) if payments>0
replace log_payments=0 if payments==0
la var log_payments "Monthly Payments (Log)"
*drop one observation with extreme value for payments (110,000): only if running a regression interacting with payments
drop if payments>100000


*defining sample; removing those with missing values
drop if weight_samp==.
drop if d_inflmedian==.
drop if d_infliqr==.
drop if nondurables==.
*dropping lagged IE: only need if we include lag IE in regression 
drop if lag_IE==.
drop if lag_infl_iqr==.
*new income variable: new_faminc is recode of categorical variables familyincome and familyincome_part2; former variable was earnings last month and highly unreliable
drop if new_faminc==.
drop if intrate_12m_up==.
drop if intrate_12m_down==.
drop if unemp_increase==.
drop if unemp_decrease==.
drop if rw_expect==.
drop if d_wageiqr==.
drop if rage==.
drop if nonwhite==.
drop if female==.
drop if coll==.
drop if retired==.
drop if mort==.
*alternative: comment out the last two drops and omit howner and hppoint from regressions 
*below results in loss of 300+ observations--will retain alternative and omit hppoint from the regression
drop if hppoint==.
*this drops 111 observations--can run alternative model without howner dummy and include these 
drop if howner==.

*replacing inflation expectations (and lags) with centered version: note that means are not weighted; this doesn't matter because weighted mean is within 0.04 ppts of unweighted mean IE
egen IE_sampmean=mean(d_inflmedian) 
gen d_inflmedian2 = d_inflmedian
replace d_inflmedian=d_inflmedian-IE_sampmean
egen lag_IE_sampmean=mean(lag_IE)
replace lag_IE=lag_IE-lag_IE_sampmean
egen lag_unc_sampmean=mean(lag_infl_iqr)
replace lag_infl_iqr=lag_infl_iqr-lag_unc_sampmean

*replacing log monthly payments with centered version
egen log_payments_sampmean=mean(log_payments)
replace log_payments=log_payments-log_payments_sampmean
*replacing log household income with centered version
egen log_inc_sampmean=mean(log_new_faminc)
gen log_new_faminc2 = log_new_faminc
replace log_new_faminc=log_new_faminc-log_inc_sampmean

sort prim_key spend_month

*need to xtset the data
destring prim_key, generate(id_new) ignore(":")
xtset id_new spend_month

*generate within-person means of time-varying independent variables: note that for variables that were recentered above, these now represent the within-household average of the deviation from the sample mean for the given variable
egen IE_bar=mean(d_inflmedian), by(id_new)
egen IE_unc_bar=mean(d_infliqr), by(id_new)
egen new_faminc_bar=mean(new_faminc), by(id_new)
egen log_new_faminc_bar=mean(log_new_faminc), by(id_new)
egen intrate_up_bar=mean(intrate_12m_up), by(id_new)
egen intrate_down_bar=mean(intrate_12m_down), by(id_new)
egen unemp_up_bar=mean(unemp_increase), by(id_new)
egen unemp_down_bar=mean(unemp_decrease), by(id_new)
egen rw_bar=mean(rw_expect), by(id_new)
egen rw_unc_bar=mean(d_wageiqr), by(id_new)
egen hp_bar=mean(hppoint), by(id_new)
egen howner_bar=mean(howner), by(id_new)
egen mort_bar=mean(mort), by(id_new)
egen log_payments_bar=mean(log_payments), by(id_new)
** Within-person mean of lagged IE and lagged infl uncertainty (new)
*egen lag_IE_bar=mean(lagE_d_inflmedian), by(id_new)
*egen lag_IE_unc_bar=mean(lagE_d_infliqr), by(id_new)

*generate variable that equals 1 in all cases, to sum to determine observations per person
gen pre_obs=1
*generate sum of observations per person
egen obs=total(pre_obs), by(id_new)
sum obs, d
*define sample  based on sufficient observation
gen nondurables_sample_base=(obs>=3)

*generate variable that equals 1 in all cases, to sum to determine observations per person
gen pre_obs2=1
*generate sum of observations per person
egen obs2=total(pre_obs2), by(id_new)
sum obs2, d
*define sample  based on sufficient observation
gen nondurables_sample_hp=(obs2>=3)

** Mortgage Subsample Focus
keep if mort==1
drop if mort==1 & amtmort==.  
drop if howner==0
*log of mortgage balance 
gen log_amtmort = log(amtmort) if amtmort>0
replace log_amtmort=0 if amtmort==0

*replacing log_mortgage amount with centered version
egen log_amtmort_sampmean=mean(log_amtmort)
replace log_amtmort=log_amtmort-log_amtmort_sampmean
*generate within-household average log mortgage balance
egen log_amtmort_bar=mean(log_amtmort), by(id_new)
*generate variable that equals 1 in all cases, to sum to determine observations per person
gen pre_obs3=1
*generate sum of observations per person
egen obs4=total(pre_obs3), by(id_new)
sum obs4, d

*define sample  based on sufficient observation
gen mort_samp=(obs4>=3)

la var log_amtmort_bar "Log Mean Mort. Balance"
la var log_amtmort "Log Mort. Balance"


#delimit ;
xtgee nondurables_real d_inflmedian d_infliqr lag_IE lag_infl_iqr i.spend_month [pweight=weight_samp] if mort_samp==1, 
family(poisson) link(log) corr(exch) vce(robust);
#delimit cr
gen sampleg2 = e(sample)
count if sampleg2==1

gen ver = 2 if sampleg2==1
tempfile month
save `month'

********************************************************************************
** Combine Files
********************************************************************************
append using `quarter'

********************************************************************************
** Variables and Labels
********************************************************************************
** Reverse College
gen nocoll = 1-coll

** Keep Constant Variables Unique to Individual
*keep if ver==1
*br rage nonwhite female nocoll retired howner mort quarter id_new
bysort ver id_new (tsend): replace rage = . if rage==rage[_n-1]
bysort ver id_new  (tsend): gen countv = _n

*br ver id_new rage countv

foreach ii in nocoll retired howner mort {
	bysort ver id_new: egen `ii'2 = mean(`ii')
	gen `ii'3 = `ii'2 if countv==1
	drop `ii' `ii'2
	ren `ii'3 `ii'
}

** Label variables for tables
la var log_new_faminc "Household Income (Log)"
la var log_new_faminc2 "Household Income (Log)"
la var new_faminc "Household Income (Median)"
la var log_new_faminc_bar "Mean Household Income (within-subject)"
la var IE_bar "Mean Inflation Expectation (within-subject)"
la var IE_unc_bar "Mean Inf. Uncertainty"
la var log_payments_bar "Mean Monthly Payments"
la var unemp_up_bar "Mean Unemp Increase"
la var unemp_down_bar "Mean Unemp Decrease"
la var intrate_up_bar "Mean Int Rates Increase" 
la var intrate_down_bar "Mean Int Rates Decrease"
la var rw_bar "Mean Real Wage Exp."
la var rw_unc_bar "Mean Wage Uncertainty (within-subject)"
la var hp_bar "Mean House Price Exp."
la var hppoint "House Price Growth Expectation"
la var howner_bar "Mean Homeowner"
la var mort_bar "Mean Mortgage Indicator"
la var mort "Has Mortgage*"
la var nocoll "No College*"
la var retired "Retired*"
la var rage "Age*" 
la var nonwhite "Non-White*"
la var rw_expect "Real Wage Growth Expectation"
la var d_wageiqr "Wage Growth Uncertainty"
la var intrate_12m_up "Expects Interest Rate Increase"
la var intrate_12m_down "Expects Interest Rate Decrease"
la var howner "Homeowner*"
la var female "Female*"
la var unemp_increase "Expects Unemployment Increase"
la var unemp_decrease "Expects Unemployment Decrease"
la var log_payments "Monthly Payments (Log)"

la var d_inflmedian "Inflation Expectation"
la var d_inflmedian2 "Inflation Expectation"
la var d_infliqr "Inflation Uncertainty"
la var d_longinflmedian "Inflation Expectation (MR)"
la var d_longinfliqr "Inflation Uncertainty (MR)"

la var durables_real2 "Durables Spending"
la var nondurables_real "Nondurables Spending"

la var durcount "Number of Durables Purchased"
la var boughtdur "Bought Durables"
la var amtmort "Mortgage Balance (\textdollar)"
la var payments "Monthly Payments (\textdollar)"

/** Recall:
local infl1 "d_inflmedian d_infliqr"
local infl2 "d_longinflmedian d_longinfliqr"
local expectations "intrate_12m_up intrate_12m_down unemp_increase unemp_decrease rw_expect d_wageiqr hppoint"
local expectations_cons "unemp_increase unemp_decrease rw_expect d_wageiqr hppoint"
*/
local weights "[pweight=weight_samp]"

drop d_inflmedian 
ren d_inflmedian2 d_inflmedian

********************************************************************************
** Quarterly Sample
********************************************************************************
/*#delimit ;
xtgee durables_real2 d_inflmedian d_infliqr i.quarter [pweight=weight_samp] if durables_sample_hp==1, 
family(poisson) link(log) corr(exch) vce(robust);
#delimit cr
gen sampleg = e(sample)
count if sampleg==1
*/
/*
preserve
collapse (p50) new_faminc `weights' if ver==2
su
restore
jn
*/
** Summary Statistics - Durables 
local t = 1
local varnames ""
replace payments = payments/3

foreach v in durables_real2 boughtdur `infl1' new_faminc amtmort payments `expectations' rage nonwhite female nocoll   {
	
	local varnames `"`varnames'"`v'" "'

	mean `v' if ver==1 `weights'
	estat sd
	matrix tmp = r(mean) , r(sd)
	matrix var_reg = round(tmp[1,1],.001) , round(tmp[1,2],.001) , . , .
	sum `v' if ver==1	
	matrix var_reg[1,3] = round(r(min),.001)
	matrix var_reg[1,4] = round(r(max),.001)
	*local di_vars `"`di_vars' "`v' " "'
	
	if `t'==1 matrix durinds =  var_reg
	if `t'!=1 matrix durinds = durinds \  var_reg
	local t = `t'+1
}
matrix rownames durinds = `varnames'


********************************************************************************
** Monthly Sample
********************************************************************************
/*#delimit ;
xtgee nondurables_real d_inflmedian d_infliqr i.spend_month [pweight=weight_samp] if nondurables_sample_hp==1, 
family(poisson) link(log) corr(exch) vce(robust);
#delimit cr
gen sampleg2 = e(sample)
count if sampleg2==1
*/

replace payments = payments * 3
replace durcount = 0 if ver==2
replace boughtdur = 0 if ver==2

local t = 1
local varnames ""
foreach v in nondurables_real boughtdur `infl1' new_faminc amtmort payments `expectations' rage nonwhite female nocoll   {
	
	local varnames `"`varnames'"`v'" "'

	mean `v' if ver==2 `weights'
	estat sd
	matrix tmp1 = r(mean) , r(sd)
	matrix var_reg2 = round(tmp1[1,1],.001) , round(tmp1[1,2],.001) , . , .
	sum `v' if ver==2
	matrix var_reg2[1,3] = round(r(min),.001)
	matrix var_reg2[1,4] = round(r(max),.001)
	*local di_vars `"`di_vars' "`v' " "'
	
	if `t'==1 matrix nondurinds =  var_reg2
	if `t'!=1 matrix nondurinds = nondurinds \  var_reg2
	local t = `t'+1
}

matrix rownames nondurinds = `varnames'


********************************************************************************
** Combine 
********************************************************************************
matrix all = durinds , nondurinds 
mat list all

#delimit ;
esttab matrix(all , fmt(%20.2fc)) using ../Tables/sumstats_mort.tex , 
replace style(tex) booktabs label 
varwidth(45) nomtit fragment msign(--) 
substitute("c1" "Mean" "c2" "SD" "c3" "Min" "c4" "Max"
"Durables Spending                            &      336.51&      914.74&        0.00&   12,942.66&    1,753.51&    1,016.21&      332.77&    9,544.67\\"
"Durables Spending (\textdollar)                       &      336.51&      914.74&        0.00&    12,942.66&           .&           .&           .&           .\\
Nondurables Spending (\textdollar) & . & . & . &  . & 1,753.51 & 1,016.21 & 332.77 & 9,544.67\\" 
"Household Income (Median)                    &   80,296.71&   44,185.44&    8,750.00&  237,500.00&   91,633.82&   49,151.96&    8,750.00&  237,500.00\\"
"Household Income (Median \textdollar)                    &    67,500&    44,185&     8,750&   237,500&    87,500&    49,152&     8,750&   237,500\\"
"Bought Durables                              &        0.38&        0.48&        0.00&        1.00&        0.00&        0.00&        0.00&        0.00\\"
"Bought Durables                              &        0.38&        0.48&        0.00&        1.00&        .&        .&        .&        .\\"
"Mortgage Balance (\textdollar)               &   99,160.66&   89,982.14&      400.00&1,100,000.00&  122,799.53&   97,521.40&      400.00&  835,000.00\\"
"Mortgage Balance (\textdollar)               &   99,161&   89,982&      400&1,100,000&  122,800&   97,521&      400&  835,000\\"
"Monthly Payments (\textdollar)               &    1,136.14&      788.47&        0.00&   10,519.67&    1,401.73&      870.15&        0.00&    9,754.00\\"
"Monthly Payments (\textdollar)               &    1,136&      788&        0&   10,520&    1,402&      870&        0&    9,754\\"
);
#delimit cr
